# SET DIRECTORY TO SOURCE FILE 

sink("../../tables/table4b_log.txt") #create log 

library(tidyverse)
library(plyr)
library(groundhog)
groundhog.library('apsrtable','2019-01-01',tolerate.R.version='4.1.2')

# Effect of heuristics on MC approval

# Previous version also only used within-subject variation in control group.
dat <- read.csv('../../data/cleaned/heuristics_cleaned_wide.csv', stringsAsFactors = FALSE)
dat$participant_republican <- ifelse(dat$political_party > 4, 1, 0)
dat$MC_republican <- ifelse(dat$reppartyoneletter == 'R', 1, 0)
dat$participant_MC_party_match = dat$participant_republican == dat$MC_republican
#summary(lm(mcratingscale ~ participant_MC_party_match, dat)) # sanity checking

# Our dependent variable: the respondents' favorability toward their Member of Congress
#sd(dat$mcratingscale)

# Data at the level of votes/SIGs potentially shown
votes <- read.csv('../../data/Ancillary Data/vote_and_group_info_map.csv', stringsAsFactors = FALSE)

# Preliminary step: identify the issues where the liberal position is support and the issues where the liberal position is oppose
liberal.groups <- c('AFL-CIO', 'NARAL Pro-Choice America', 'Human Rights Campaign',
                    'League of Conservation Voters', 'National Parks Conservation Association',
                    'National Active and Retired Federal Employees Association')
conservative.groups <- setdiff(votes$sig_name, liberal.groups)
#liberal.groups
#conservative.groups

# Iterate over votes to create new columns in dat for each vote possibly shown
dat$sig_rating_supportive_vote_shown <- NA
dat$sig_signal_matches_voter_vote_shown <- NA

for(i in 1:nrow(votes)) {
  sig_id <- votes$sig_id[i]
  sig_name <- votes$sig_name[i]
  house_vote_id <- votes$house_vote_id[i]
  stopifnot(sig_name %in% c(liberal.groups, conservative.groups))
  sig.conservative <- ifelse(sig_name %in% conservative.groups, 1, -1)
  
  # If house vote is missing, need to set rating to NA since it would not be shown
  dat[is.na(dat[,paste0('repactualvote', house_vote_id)]),
      paste0('reprating', sig_id)] <- NA
  
  # Does the respondent have a conservative bill preference on this bill?
  # All the bills are conservative except the prohibit use of funds bill, 56612.
  dat[,paste0('bill_pref_conservative', house_vote_id)] <-
    recode(dat[,paste0('ownview', house_vote_id)] , `0` = -1, `1` = 1) # recode 0 to -1
  if(house_vote_id == 56612) dat[,paste0('bill_pref_conservative', house_vote_id)] <-
    -1 * dat[,paste0('bill_pref_conservative', house_vote_id)]
  # print(summary(lm( dat[,paste0('bill_pref_conservative', house_vote_id)]  ~ dat$participant_republican)))
  
  # did the SIG give a positive or negative rating to the MC?
  dat[,paste0('sig_rating_supportive_', sig_id)] <-
    ifelse(dat[,paste0('reprating', sig_id)] < 50, -1, 1)
  # print(summary(lm( dat[,paste0('sig_rating_supportive_', sig_id)] ~ dat[,paste0('reprating', sig_id)] )))
  
  # code heuristic_signal such that 1 means the SIG rating sent a conservative signal about the MC's vote
  # and -1 meant it sent a liberal signal about the MC's vote
  dat[,paste0('sig_signal_conservative_', sig_id)] <-
    dat[,paste0('sig_rating_supportive_', sig_id)] * sig.conservative
  # print(summary(lm(dat[,paste0('sig_signal_conservative_', sig_id)] ~ dat$MC_republican)))
  
  # Define sig_signal_matches_voter as whether the ideological direction of the respondent's preference on an issue
  # matches the ideological direction of the signal sent by the endorsement
  dat[,paste0('sig_signal_matches_voter_', sig_id)] <-
    dat[,paste0('bill_pref_conservative', house_vote_id)] * dat[,paste0('sig_signal_conservative_', sig_id)]
  # print(summary(lm(dat$participant_MC_party_match ~ dat[,paste0('sig_signal_matches_voter_', sig_id)])))
  
  # Change to binary for easier interpretation
  dat[, paste0('sig_rating_supportive_', sig_id)] <-
    as.numeric(dat[, paste0('sig_rating_supportive_', sig_id)] == 1)
  dat[, paste0('sig_signal_matches_voter_', sig_id)] <-
    as.numeric(dat[, paste0('sig_signal_matches_voter_', sig_id)] == 1)
  
  # If this was the vote that was shown, load this vote into the proper variable
  this.vote.shown <- which(dat$heur_randomrating_sig_id == sig_id)
  dat$sig_rating_supportive_vote_shown[this.vote.shown] <-
    dat[this.vote.shown, paste0('sig_rating_supportive_', sig_id)]
  dat$sig_signal_matches_voter_vote_shown[this.vote.shown] <-
    dat[this.vote.shown, paste0('sig_signal_matches_voter_', sig_id)]
  
  # For outsheeting for stata for 3b:
  # Define thinks_rep_agrees as whether the respondent thinks the rep agrees with them on this issue
  dat[,paste0('thinks_rep_agrees_', house_vote_id)] <-
    as.numeric(dat[,paste0('ownview', house_vote_id)] == dat[,paste0('repvoteperc', house_vote_id)])
  
  # As a control, add whether they actually agree
  dat[,paste0('rep_actually_agrees_', house_vote_id)] <-
    as.numeric(dat[,paste0('ownview', house_vote_id)] == dat[,paste0('repactualvote', house_vote_id)])
}


# Control for average values of treatment within version of survey shown (a or b)
dat$sig_rating_supportive_avg <- NA
dat$sig_signal_matches_voter_avg <- NA

dat$prob.s0m0 <- NA
dat$prob.s0m1 <- NA
dat$prob.s1m0 <- NA
dat$prob.s1m1 <- NA

for(version in c('a', 'b')) {
  sig.ids <- votes$sig_id[votes$split == version]
  
  # Average of each arm
  for(s in c(0,1)) {
    for(m in c(0,1)) {
      dat[dat$heur_survey_version == version, paste0('prob.s', s, 'm', m)] <-
        round(
          rowMeans(as.matrix(dat[dat$heur_survey_version == version, paste0('sig_rating_supportive_', sig.ids)] == s) *
                     as.matrix(dat[dat$heur_survey_version == version, paste0('sig_signal_matches_voter_', sig.ids)] == m)
                   , na.rm = TRUE)
          , 5)
    }
  }
  # Average of whether SIG ratings were supportive
  dat$sig_rating_supportive_avg[dat$heur_survey_version == version] <-
    rowMeans(dat[dat$heur_survey_version == version, paste0('sig_rating_supportive_', sig.ids)], na.rm = TRUE)
  
  # Average of whether signal from SIG matches the voter
  dat$sig_signal_matches_voter_avg[dat$heur_survey_version == version] <-
    rowMeans(dat[dat$heur_survey_version == version, paste0('sig_signal_matches_voter_', sig.ids)], na.rm = TRUE)
}
stopifnot(round(dat$prob.s0m0 + dat$prob.s0m1 + dat$prob.s1m0 + dat$prob.s1m1, 4) == 1)
dat <- dat %>%
  mutate(sameprob.fes = group_indices(., prob.s0m0, prob.s0m1, prob.s1m0, prob.s1m1)) %>%
  mutate(sameprob.fes = factor(sameprob.fes))

# Determine which fixed effect cells contribute to identification in horserace regressions
dat <- dat %>%
  mutate(prob4max = pmax(prob.s0m0, prob.s0m1, prob.s1m0, prob.s1m1),
         prob.s0 = prob.s0m0 + prob.s0m1,
         prob.m0 = prob.s0m0 + prob.s1m0,
         probs.nonzero = prob.s0 > 0 & prob.s0 < 1 & prob.m0 > 0 & prob.m0 < 1) %>%
  group_by(sameprob.fes) %>%
  add_tally(name = 'n.in.fe.cell') %>%
  mutate(is.used.in.horserace.identification = n.in.fe.cell >= 2 & prob4max < .9999999,
         # Strict version removes cases that only contribute to identification for one of the two coefficients
         is.used.in.horserace.identification.strict = is.used.in.horserace.identification & probs.nonzero)

## Regress respondents' support for their member of Congress on the proportion of votes on which they agree, and whether the SIG heuristic suggests policy preference congruence
# fixed effects for sig_signal_matches_voter_avg
controls <- as.character(tidyselect::vars_select(names(dat), starts_with('control_')))


# DV = average of whether person thinks they agree with their MC
dat$mean_thinks_rep_agrees <- rowMeans(dat[,startsWith(names(dat), 'thinks_rep_agrees_')], na.rm = TRUE)
dat$num_issue_qs <- rowSums(!is.na(dat[,startsWith(names(dat), 'thinks_rep_agrees_')]))
dat$mean_thinks_rep_agrees <- with(dat, mean_thinks_rep_agrees * num_issue_qs)

# Control variable = average of whether they actually agree
dat$actually_agrees_w_rep <- rowMeans(dat[,startsWith(names(dat), 'rep_actually_agrees_')], na.rm = TRUE) *
#  dat$num_issue_qs

write.csv(dat, '../../data/cleaned/heuristics_4b.csv', row.names = F)


# Regressions
m_agreement_fe = lm(
  as.formula(
    paste("mcratingscale ~ sig_signal_matches_voter_vote_shown + factor(sig_signal_matches_voter_avg) + ",
          paste(controls, collapse= " + "))
  )
  , dat)
#summary(m_agreement_fe)

m_naive_fe = lm(
  as.formula(
    paste("mcratingscale ~ sig_rating_supportive_vote_shown +
                  as.factor(sig_rating_supportive_avg) + ",
          paste(controls, collapse= "+"))
  )
  , dat)
#summary(m_naive_fe)

m_agreement_fe.withusedinfinal = lm(
  as.formula(
    paste("mcratingscale ~ sig_signal_matches_voter_vote_shown + factor(sig_signal_matches_voter_avg) + ",
          paste(controls, collapse= " + "))
  )
  , dat, subset = is.used.in.horserace.identification)
#summary(m_agreement_fe.withusedinfinal)

m_naive_fe.withusedinfinal = lm(
  as.formula(
    paste("mcratingscale ~ sig_rating_supportive_vote_shown +
                  as.factor(sig_rating_supportive_avg) + ",
          paste(controls, collapse= "+"))
  )
  , dat, subset = is.used.in.horserace.identification)

# Horserace with interaction of FEs
m_horserace_fe = lm(as.formula(paste("mcratingscale ~ sig_signal_matches_voter_vote_shown +
                      sig_rating_supportive_vote_shown +
                      factor(sameprob.fes) + ", paste(controls, collapse= " + ")))
                    , dat)
#summary(m_horserace_fe)

m_horserace_fe.withusedinfinal = lm(as.formula(paste("mcratingscale ~ sig_signal_matches_voter_vote_shown +
                      sig_rating_supportive_vote_shown +
                      factor(sameprob.fes) + ", paste(controls, collapse= " + ")))
                    , dat, subset = is.used.in.horserace.identification)

m_horserace_fe.withusedinfinal.strict = lm(as.formula(paste("mcratingscale ~ sig_signal_matches_voter_vote_shown +
                      sig_rating_supportive_vote_shown +
                      factor(sameprob.fes) + ", paste(controls, collapse= " + ")))
                                    , dat, subset = is.used.in.horserace.identification.strict)


# Table 4b
apsrtable::apsrtable(m_agreement_fe, m_naive_fe, m_horserace_fe,
                     m_horserace_fe.withusedinfinal, m_horserace_fe.withusedinfinal.strict,
                     omitcoef = list(
                       expression(grep("factor",coefnames)),
                       expression(grep("control_",coefnames))
                     ),
                     stars = 'default')
sink()